home *** CD-ROM | disk | FTP | other *** search
/ AmigActive 2 / AACD 2.iso / AACD / Magazine / GraphicsCards / StormMesa / src / vbxform_asmppc.p < prev    next >
Text File  |  1999-02-04  |  58KB  |  1,774 lines

  1. ;
  2. ; Mesa 3-D graphics library
  3. ; Version:  2.5
  4. ; Copyright (C) 1995-1997  Brian Paul
  5. ;
  6. ; This library is free software; you can redistribute it and/or
  7. ; modify it under the terms of the GNU Library General Public
  8. ; License as published by the Free Software Foundation; either
  9. ; version 2 of the License, or (at your option) any later version.
  10. ;
  11. ; This library is distributed in the hope that it will be useful,
  12. ; but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. ; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14. ; Library General Public License for more details.
  15. ;
  16. ; You should have received a copy of the GNU Library General Public
  17. ; License along with this library; if not, write to the Free
  18. ; Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  19. ;
  20.  
  21. ;   vbxform_asmppc.p
  22. ;   16.5.1998 by Sam Jordan
  23. ;
  24. ;   PowerPC assembler optimizations of several functions in vbxform.c
  25. ;   Originally written for AMIGA OS/PowerOpen. To use this source on other
  26. ;   PowerPC based platforms or with other programming models, some
  27. ;   modifications might be needed.
  28.  
  29. CLIP_RIGHT_BIT  =       $01
  30. CLIP_LEFT_BIT   =       $02
  31. CLIP_TOP_BIT    =       $04
  32. CLIP_BOTTOM_BIT =       $08
  33. CLIP_NEAR_BIT   =       $10
  34. CLIP_FAR_BIT    =       $20
  35.  
  36.  
  37.                 XDEF    _asm_transform_points3_general
  38.                 XDEF    _asm_transform_points3_identity
  39.                 XDEF    _asm_transform_points3_2d
  40.                 XDEF    _asm_transform_points3_2d_no_rot
  41.                 XDEF    _asm_transform_points3_3d
  42.                 XDEF    _asm_transform_points4_general
  43.                 XDEF    _asm_transform_points4_identity
  44.                 XDEF    _asm_transform_points4_2d
  45.                 XDEF    _asm_transform_points4_2d_no_rot
  46.                 XDEF    _asm_transform_points4_3d
  47.                 XDEF    _asm_project_and_cliptest_general
  48.                 XDEF    _asm_project_and_cliptest_identity
  49.                 XDEF    _asm_project_and_cliptest_ortho
  50.                 XDEF    _asm_project_and_cliptest_perspective
  51.  
  52.                 XDEF    _asm_vp_map_vertices_now
  53.                 XDEF    _asm_vp_map_vertices
  54.  
  55.  
  56.  
  57. _asm_transform_points3_general
  58.  
  59. ;            const GLfloat *m = ctx->ModelViewMatrix;
  60. ;            GLfloat m0 = m[0],  m4 = m[4],  m8 = m[8],  m12 = m[12];
  61. ;            GLfloat m1 = m[1],  m5 = m[5],  m9 = m[9],  m13 = m[13];
  62. ;            GLfloat m2 = m[2],  m6 = m[6],  m10 = m[10],  m14 = m[14];
  63. ;            GLfloat m3 = m[3],  m7 = m[7],  m11 = m[11],  m15 = m[15];
  64. ;            GLuint i;
  65. ;            for (i=0;i<n;i++) {
  66. ;               GLfloat ox = vObj[i][0], oy = vObj[i][1], oz = vObj[i][2];
  67. ;               vEye[i][0] = m0 * ox + m4 * oy + m8  * oz + m12;
  68. ;               vEye[i][1] = m1 * ox + m5 * oy + m9  * oz + m13;
  69. ;               vEye[i][2] = m2 * ox + m6 * oy + m10 * oz + m14;
  70. ;               vEye[i][3] = m3 * ox + m7 * oy + m11 * oz + m15;
  71. ;            }
  72.  
  73.  
  74.                 stfd    f14,-1*8(r1)
  75.                 stfd    f15,-2*8(r1)
  76.                 stfd    f16,-3*8(r1)
  77.                 stfd    f17,-4*8(r1)
  78.                 stfd    f18,-5*8(r1)
  79.                 stfd    f19,-6*8(r1)
  80.                 stfd    f20,-7*8(r1)
  81.                 stfd    f21,-8*8(r1)
  82.                 stfd    f22,-9*8(r1)
  83.                 mr.     r3,r3
  84.                 beq     .end
  85.                 subi    r5,r5,4
  86.                 lfsu    f0,4(r5)
  87.                 lfsu    f1,4(r5)
  88.                 lfsu    f2,4(r5)
  89.                 lfsu    f3,4(r5)
  90.                 lfsu    f4,4(r5)
  91.                 lfsu    f5,4(r5)
  92.                 lfsu    f6,4(r5)
  93.                 lfsu    f7,4(r5)
  94.                 lfsu    f8,4(r5)
  95.                 lfsu    f9,4(r5)
  96.                 lfsu    f10,4(r5)
  97.                 lfsu    f11,4(r5)
  98.                 lfsu    f12,4(r5)
  99.                 lfsu    f13,4(r5)
  100.                 lfsu    f14,4(r5)
  101.                 lfsu    f15,4(r5)
  102.                 mtctr   r3
  103.                 subi    r6,r6,4
  104.                 subi    r4,r4,4
  105. .loop
  106.                 lfsu    f16,4(r6)               ;f16 = ox
  107.                 fmadds  f19,f0,f16,f12
  108.                 fmadds  f20,f1,f16,f13
  109.                 lfsu    f17,4(r6)               ;f17 = oy
  110.                 fmadds  f21,f2,f16,f14
  111.                 fmadds  f22,f3,f16,f15
  112.                 fmadds  f19,f4,f17,f19
  113.                 fmadds  f20,f5,f17,f20
  114.                 lfsu    f18,4(r6)               ;f18 = oz
  115.                 fmadds  f21,f6,f17,f21
  116.                 fmadds  f22,f7,f17,f22
  117.                 addi    r6,r6,4
  118.                 fmadds  f19,f8,f18,f19
  119.                 stfsu   f19,4(r4)
  120.                 fmadds  f20,f9,f18,f20
  121.                 stfsu   f20,4(r4)
  122.                 fmadds  f21,f10,f18,f21
  123.                 stfsu   f21,4(r4)
  124.                 fmadds  f22,f11,f18,f22
  125.                 stfsu   f22,4(r4)
  126.                 bdnz    .loop
  127. .end
  128.                 lfd     f22,-9*8(r1)
  129.                 lfd     f21,-8*8(r1)
  130.                 lfd     f20,-7*8(r1)
  131.                 lfd     f19,-6*8(r1)
  132.                 lfd     f18,-5*8(r1)
  133.                 lfd     f17,-4*8(r1)
  134.                 lfd     f16,-3*8(r1)
  135.                 lfd     f15,-2*8(r1)
  136.                 lfd     f14,-1*8(r1)
  137.                 blr
  138.  
  139.  
  140.  
  141.  
  142.  
  143. _asm_transform_points3_identity
  144.  
  145. ;            GLuint i;
  146. ;            for (i=0;i<n;i++) {
  147. ;               vEye[i][0] = vObj[i][0];
  148. ;               vEye[i][1] = vObj[i][1];
  149. ;               vEye[i][2] = vObj[i][2];
  150. ;               vEye[i][3] = 1.0F;
  151. ;            }
  152.  
  153.                 mr.     r3,r3
  154.                 beq     .end
  155.                 subi    r4,r4,4
  156.                 subi    r5,r5,4
  157.                 mtctr   r3
  158.                 lis     r6,$3f80
  159. .loop
  160.                 lwzu    r0,4(r5)
  161.                 stwu    r0,4(r4)
  162.                 lwzu    r0,4(r5)
  163.                 stwu    r0,4(r4)
  164.                 lwzu    r0,4(r5)
  165.                 stwu    r0,4(r4)
  166.                 addi    r5,r5,4
  167.                 stwu    r6,4(r4)
  168.                 bdnz    .loop
  169. .end
  170.                 blr
  171.  
  172.  
  173.  
  174.  
  175.  
  176. _asm_transform_points3_2d
  177.  
  178.  
  179. ;            const GLfloat *m = ctx->ModelViewMatrix;
  180. ;            GLfloat m0 = m[0], m1 = m[1], m4 = m[4], m5 = m[5];
  181. ;            GLfloat m12 = m[12], m13 = m[13];
  182. ;            GLuint i;
  183. ;            for (i=0;i<n;i++) {
  184. ;               GLfloat ox = vObj[i][0], oy = vObj[i][1], oz = vObj[i][2];
  185. ;               vEye[i][0] = m0 * ox + m4 * oy            + m12       ;
  186. ;               vEye[i][1] = m1 * ox + m5 * oy            + m13       ;
  187. ;               vEye[i][2] =                   +       oz             ;
  188. ;               vEye[i][3] =                                      1.0F;
  189. ;            }
  190.  
  191.                 mr.     r3,r3
  192.                 beq     .end
  193.                 lfs     f0,0*4(r5)
  194.                 lfs     f1,1*4(r5)
  195.                 lfs     f4,4*4(r5)
  196.                 lfs     f5,5*4(r5)
  197.                 lfs     f12,12*4(r5)
  198.                 lfs     f13,13*4(r5)
  199.                 mtctr   r3
  200.                 subi    r6,r6,4
  201.                 subi    r4,r4,4
  202.                 lis     r7,$3f80
  203. .loop
  204.                 lfsu    f6,4(r6)                ;f6 = ox
  205.                 fmadds  f2,f0,f6,f12
  206.                 fmadds  f3,f1,f6,f13
  207.                 lfsu    f7,4(r6)                ;f7 = oy
  208.                 fmadds  f2,f4,f7,f2
  209.                 lfsu    f8,4(r6)                ;f8 = oz
  210.                 fmadds  f3,f5,f7,f3
  211.                 stfsu   f2,4(r4)
  212.                 stfsu   f3,4(r4)
  213.                 stfsu   f8,4(r4)
  214.                 addi    r6,r6,4
  215.                 stwu    r7,4(r4)
  216.                 bdnz    .loop
  217. .end
  218.                 blr
  219.  
  220.  
  221.  
  222.  
  223.  
  224. _asm_transform_points3_2d_no_rot
  225.  
  226. ;            const GLfloat *m = ctx->ModelViewMatrix;
  227. ;            GLfloat m0 = m[0], m5 = m[5], m12 = m[12], m13 = m[13];
  228. ;            GLuint i;
  229. ;            for (i=0;i<n;i++) {
  230. ;               GLfloat ox = vObj[i][0], oy = vObj[i][1], oz = vObj[i][2];
  231. ;               vEye[i][0] = m0 * ox                      + m12       ;
  232. ;               vEye[i][1] =           m5 * oy            + m13       ;
  233. ;               vEye[i][2] =                   +       oz             ;
  234. ;               vEye[i][3] =                                      1.0F;
  235. ;            }
  236.  
  237.                 mr.     r3,r3
  238.                 beq     .end
  239.                 lfs     f0,0*4(r5)
  240.                 lfs     f5,5*4(r5)
  241.                 lfs     f12,12*4(r5)
  242.                 lfs     f13,13*4(r5)
  243.                 mtctr   r3
  244.                 subi    r6,r6,4
  245.                 subi    r4,r4,4
  246.                 lis     r7,$3f80
  247. .loop
  248.                 lfsu    f6,4(r6)                ;f6 = ox
  249.                 fmadds  f2,f0,f6,f12
  250.                 lfsu    f7,4(r6)                ;f7 = oy
  251.                 fmadds  f3,f5,f7,f13
  252.                 lfsu    f8,4(r6)                ;f7 = oy
  253.                 stfsu   f2,4(r4)
  254.                 stfsu   f3,4(r4)
  255.                 stfsu   f8,4(r4)
  256.                 addi    r6,r6,4
  257.                 stwu    r7,4(r4)
  258.                 bdnz    .loop
  259. .end
  260.                 blr
  261.  
  262.  
  263.  
  264.  
  265.  
  266. _asm_transform_points3_3d
  267.  
  268.  
  269. ;            const GLfloat *m = ctx->ModelViewMatrix;
  270. ;            GLfloat m0 = m[0], m1 = m[1], m2 = m[2], m4 = m[4], m5 = m[5];
  271. ;            GLfloat m6 = m[6], m8 = m[8], m9 = m[9], m10 = m[10];
  272. ;            GLfloat m12 = m[12], m13 = m[13], m14 = m[14];
  273. ;            GLuint i;
  274. ;            for (i=0;i<n;i++) {
  275. ;               GLfloat ox = vObj[i][0], oy = vObj[i][1], oz = vObj[i][2];
  276. ;               vEye[i][0] = m0 * ox + m4 * oy +  m8 * oz + m12       ;
  277. ;               vEye[i][1] = m1 * ox + m5 * oy +  m9 * oz + m13       ;
  278. ;               vEye[i][2] = m2 * ox + m6 * oy + m10 * oz + m14       ;
  279. ;               vEye[i][3] =                                      1.0F;
  280. ;            }
  281.  
  282.                 stfd    f14,-1*8(r1)
  283.                 stfd    f15,-2*8(r1)
  284.                 stfd    f16,-3*8(r1)
  285.                 stfd    f17,-4*8(r1)
  286.                 mr.     r3,r3
  287.                 beq     .end
  288.                 subi    r5,r5,4
  289.                 lfsu    f0,4(r5)
  290.                 lfsu    f1,4(r5)
  291.                 lfsu    f2,4(r5)
  292.                 addi    r5,r5,4
  293.                 lfsu    f4,4(r5)
  294.                 lfsu    f5,4(r5)
  295.                 lfsu    f6,4(r5)
  296.                 addi    r5,r5,4
  297.                 lfsu    f8,4(r5)
  298.                 lfsu    f9,4(r5)
  299.                 lfsu    f10,4(r5)
  300.                 addi    r5,r5,4
  301.                 lfsu    f12,4(r5)
  302.                 lfsu    f13,4(r5)
  303.                 lfsu    f14,4(r5)
  304.                 mtctr   r3
  305.                 subi    r6,r6,4
  306.                 subi    r4,r4,4
  307.                 lis     r7,$3f80
  308. .loop
  309.                 lfsu    f3,4(r6)                ;f3 = ox
  310.                 fmadds  f15,f0,f3,f12
  311.                 fmadds  f16,f1,f3,f13
  312.                 lfsu    f7,4(r6)                ;f7 = oy
  313.                 fmadds  f17,f2,f3,f14
  314.                 fmadds  f15,f4,f7,f15
  315.                 lfsu    f11,4(r6)               ;f11 = oz
  316.                 fmadds  f16,f5,f7,f16
  317.                 fmadds  f17,f6,f7,f17
  318.                 fmadds  f15,f8,f11,f15
  319.                 stfsu   f15,4(r4)
  320.                 fmadds  f16,f9,f11,f16
  321.                 addi    r6,r6,4
  322.                 stfsu   f16,4(r4)
  323.                 fmadds  f17,f10,f11,f17
  324.                 stfsu   f17,4(r4)
  325.                 stwu    r7,4(r4)
  326.                 bdnz    .loop
  327. .end
  328.                 lfd     f17,-4*8(r1)
  329.                 lfd     f16,-3*8(r1)
  330.                 lfd     f15,-2*8(r1)
  331.                 lfd     f14,-1*8(r1)
  332.                 blr
  333.  
  334.  
  335.  
  336.  
  337.  
  338.  
  339.  
  340. _asm_transform_points4_general
  341.  
  342. ;            const GLfloat *m = ctx->ModelViewMatrix;
  343. ;            GLfloat m0 = m[0],  m4 = m[4],  m8 = m[8],  m12 = m[12];
  344. ;            GLfloat m1 = m[1],  m5 = m[5],  m9 = m[9],  m13 = m[13];
  345. ;            GLfloat m2 = m[2],  m6 = m[6],  m10 = m[10],  m14 = m[14];
  346. ;            GLfloat m3 = m[3],  m7 = m[7],  m11 = m[11],  m15 = m[15];
  347. ;            GLuint i;
  348. ;            for (i=0;i<n;i++) {
  349. ;               GLfloat ox = vObj[i][0], oy = vObj[i][1];
  350. ;               GLfloat oz = vObj[i][2], ow = vObj[i][3];
  351. ;               vEye[i][0] = m0 * ox + m4 * oy + m8  * oz + m12 * ow;
  352. ;               vEye[i][1] = m1 * ox + m5 * oy + m9  * oz + m13 * ow;
  353. ;               vEye[i][2] = m2 * ox + m6 * oy + m10 * oz + m14 * ow;
  354. ;               vEye[i][3] = m3 * ox + m7 * oy + m11 * oz + m15 * ow;
  355. ;            }
  356.  
  357.                 stfd    f14,-1*8(r1)
  358.                 stfd    f15,-2*8(r1)
  359.                 stfd    f16,-3*8(r1)
  360.                 stfd    f17,-4*8(r1)
  361.                 stfd    f18,-5*8(r1)
  362.                 stfd    f19,-6*8(r1)
  363.                 stfd    f20,-7*8(r1)
  364.                 stfd    f21,-8*8(r1)
  365.                 stfd    f22,-9*8(r1)
  366.                 stfd    f23,-10*8(r1)
  367.                 mr.     r3,r3
  368.                 beq     .end
  369.                 subi    r5,r5,4
  370.                 lfsu    f0,4(r5)
  371.                 lfsu    f1,4(r5)
  372.                 lfsu    f2,4(r5)
  373.                 lfsu    f3,4(r5)
  374.                 lfsu    f4,4(r5)
  375.                 lfsu    f5,4(r5)
  376.                 lfsu    f6,4(r5)
  377.                 lfsu    f7,4(r5)
  378.                 lfsu    f8,4(r5)
  379.                 lfsu    f9,4(r5)
  380.                 lfsu    f10,4(r5)
  381.                 lfsu    f11,4(r5)
  382.                 lfsu    f12,4(r5)
  383.                 lfsu    f13,4(r5)
  384.                 lfsu    f14,4(r5)
  385.                 lfsu    f15,4(r5)
  386.                 mtctr   r3
  387.                 subi    r6,r6,4
  388.                 subi    r4,r4,4
  389. .loop
  390.                 lfsu    f16,4(r6)               ;f16 = ox
  391.                 fmuls   f19,f0,f16
  392.                 fmuls   f20,f1,f16
  393.                 lfsu    f17,4(r6)               ;f17 = oy
  394.                 fmuls   f21,f2,f16
  395.                 fmuls   f22,f3,f16
  396.                 fmadds  f19,f4,f17,f19
  397.                 lfsu    f18,4(r6)               ;f18 = oz
  398.                 fmadds  f20,f5,f17,f20
  399.                 fmadds  f21,f6,f17,f21
  400.                 fmadds  f22,f7,f17,f22
  401.                 lfsu    f23,4(r6)               ;f23 = ow
  402.                 fmadds  f19,f8,f18,f19
  403.                 fmadds  f20,f9,f18,f20
  404.                 fmadds  f21,f10,f18,f21
  405.                 fmadds  f22,f11,f18,f22
  406.                 fmadds  f19,f12,f23,f19
  407.                 stfsu   f19,4(r4)
  408.                 fmadds  f20,f13,f23,f20
  409.                 stfsu   f20,4(r4)
  410.                 fmadds  f21,f14,f23,f21
  411.                 stfsu   f21,4(r4)
  412.                 fmadds  f22,f15,f23,f22
  413.                 stfsu   f22,4(r4)
  414.                 bdnz    .loop
  415. .end
  416.                 lfd     f23,-10*8(r1)
  417.                 lfd     f22,-9*8(r1)
  418.                 lfd     f21,-8*8(r1)
  419.                 lfd     f20,-7*8(r1)
  420.                 lfd     f19,-6*8(r1)
  421.                 lfd     f18,-5*8(r1)
  422.                 lfd     f17,-4*8(r1)
  423.                 lfd     f16,-3*8(r1)
  424.                 lfd     f15,-2*8(r1)
  425.                 lfd     f14,-1*8(r1)
  426.                 blr
  427.  
  428.  
  429.  
  430.  
  431.  
  432. _asm_transform_points4_identity
  433.  
  434. ;            GLuint i;
  435. ;            for (i=0;i<n;i++) {
  436. ;               vEye[i][0] = vObj[i][0];
  437. ;               vEye[i][1] = vObj[i][1];
  438. ;               vEye[i][2] = vObj[i][2];
  439. ;               vEye[i][3] = vObj[i][3];
  440. ;            }
  441.  
  442.                 mr.     r3,r3
  443.                 beq     .end
  444.                 subi    r4,r4,4
  445.                 subi    r5,r5,4
  446.                 mtctr   r3
  447. .loop
  448.                 lwzu    r0,4(r5)
  449.                 stwu    r0,4(r4)
  450.                 lwzu    r0,4(r5)
  451.                 stwu    r0,4(r4)
  452.                 lwzu    r0,4(r5)
  453.                 stwu    r0,4(r4)
  454.                 lwzu    r0,4(r5)
  455.                 stwu    r0,4(r4)
  456.                 bdnz    .loop
  457. .end
  458.                 blr
  459.  
  460.  
  461.  
  462.  
  463.  
  464. _asm_transform_points4_2d
  465.  
  466.  
  467. ;            const GLfloat *m = ctx->ModelViewMatrix;
  468. ;            GLfloat m0 = m[0], m1 = m[1], m4 = m[4], m5 = m[5];
  469. ;            GLfloat m12 = m[12], m13 = m[13];
  470. ;            GLuint i;
  471. ;            for (i=0;i<n;i++) {
  472. ;               GLfloat ox = vObj[i][0], oy = vObj[i][1];
  473. ;               GLfloat oz = vObj[i][2], ow = vObj[i][3];
  474. ;               vEye[i][0] = m0 * ox + m4 * oy            + m12 * ow;
  475. ;               vEye[i][1] = m1 * ox + m5 * oy            + m13 * ow;
  476. ;               vEye[i][2] =                   +       oz           ;
  477. ;               vEye[i][3] =                                      ow;
  478.  
  479.                 mr.     r3,r3
  480.                 beq     .end
  481.                 lfs     f0,0*4(r5)
  482.                 lfs     f1,1*4(r5)
  483.                 lfs     f4,4*4(r5)
  484.                 lfs     f5,5*4(r5)
  485.                 lfs     f12,12*4(r5)
  486.                 lfs     f13,13*4(r5)
  487.                 mtctr   r3
  488.                 subi    r6,r6,4
  489.                 subi    r4,r4,4
  490. .loop
  491.                 lfsu    f6,4(r6)                ;f6 = ox
  492.                 fmuls   f2,f0,f6
  493.                 lfsu    f7,4(r6)                ;f7 = oy
  494.                 fmuls   f3,f1,f6
  495.                 lfsu    f8,4(r6)                ;f8 = oz
  496.                 fmadds  f2,f4,f7,f2
  497.                 lfsu    f9,4(r6)                ;f9 = ow
  498.                 fmadds  f3,f5,f7,f3
  499.                 fmadds  f2,f12,f9,f2
  500.                 stfsu   f2,4(r4)
  501.                 fmadds  f3,f13,f9,f3
  502.                 stfsu   f3,4(r4)
  503.                 stfsu   f8,4(r4)
  504.                 stfsu   f9,4(r4)
  505.                 bdnz    .loop
  506. .end
  507.                 blr
  508.  
  509.  
  510.  
  511.  
  512.  
  513. _asm_transform_points4_2d_no_rot
  514.  
  515. ;            const GLfloat *m = ctx->ModelViewMatrix;
  516. ;            GLfloat m0 = m[0], m5 = m[5], m12 = m[12], m13 = m[13];
  517. ;            GLuint i;
  518. ;            for (i=0;i<n;i++) {
  519. ;               GLfloat ox = vObj[i][0], oy = vObj[i][1];
  520. ;               GLfloat oz = vObj[i][2], ow = vObj[i][3];
  521. ;               vEye[i][0] = m0 * ox                      + m12 * ow;
  522. ;               vEye[i][1] =           m5 * oy            + m13 * ow;
  523. ;               vEye[i][2] =                   +       oz           ;
  524. ;               vEye[i][3] =                                      ow;
  525. ;            }
  526.  
  527.                 mr.     r3,r3
  528.                 beq     .end
  529.                 lfs     f0,0*4(r5)
  530.                 lfs     f5,5*4(r5)
  531.                 lfs     f12,12*4(r5)
  532.                 lfs     f13,13*4(r5)
  533.                 mtctr   r3
  534.                 subi    r6,r6,4
  535.                 subi    r4,r4,4
  536. .loop
  537.                 lfsu    f6,4(r6)                ;f6 = ox
  538.                 lfsu    f7,4(r6)                ;f7 = oy
  539.                 fmuls   f2,f0,f6
  540.                 lfsu    f8,4(r6)                ;f7 = oy
  541.                 fmuls   f3,f5,f7
  542.                 lfsu    f9,4(r6)                ;f9 = ow
  543.                 fmadds  f2,f12,f9,f2
  544.                 stfsu   f2,4(r4)
  545.                 fmadds  f3,f13,f9,f3
  546.                 stfsu   f3,4(r4)
  547.                 stfsu   f8,4(r4)
  548.                 stfsu   f9,4(r4)
  549.                 bdnz    .loop
  550. .end
  551.                 blr
  552.  
  553.  
  554.  
  555.  
  556.  
  557. _asm_transform_points4_3d
  558.  
  559. ;            const GLfloat *m = ctx->ModelViewMatrix;
  560. ;            GLfloat m0 = m[0], m1 = m[1], m2 = m[2], m4 = m[4], m5 = m[5];
  561. ;            GLfloat m6 = m[6], m8 = m[8], m9 = m[9], m10 = m[10];
  562. ;            GLfloat m12 = m[12], m13 = m[13], m14 = m[14];
  563. ;            GLuint i;
  564. ;            for (i=0;i<n;i++) {
  565. ;               GLfloat ox = vObj[i][0], oy = vObj[i][1];
  566. ;               GLfloat oz = vObj[i][2], ow = vObj[i][3];
  567. ;               vEye[i][0] = m0 * ox + m4 * oy +  m8 * oz + m12 * ow;
  568. ;               vEye[i][1] = m1 * ox + m5 * oy +  m9 * oz + m13 * ow;
  569. ;               vEye[i][2] = m2 * ox + m6 * oy + m10 * oz + m14 * ow;
  570. ;               vEye[i][3] =                                      ow;
  571. ;            }
  572.  
  573.                 stfd    f14,-1*8(r1)
  574.                 stfd    f15,-2*8(r1)
  575.                 stfd    f16,-3*8(r1)
  576.                 stfd    f17,-4*8(r1)
  577.                 stfd    f18,-5*8(r1)
  578.                 mr.     r3,r3
  579.                 beq     .end
  580.                 subi    r5,r5,4
  581.                 lfsu    f0,4(r5)
  582.                 lfsu    f1,4(r5)
  583.                 lfsu    f2,4(r5)
  584.                 addi    r5,r5,4
  585.                 lfsu    f4,4(r5)
  586.                 lfsu    f5,4(r5)
  587.                 lfsu    f6,4(r5)
  588.                 addi    r5,r5,4
  589.                 lfsu    f8,4(r5)
  590.                 lfsu    f9,4(r5)
  591.                 lfsu    f10,4(r5)
  592.                 addi    r5,r5,4
  593.                 lfsu    f12,4(r5)
  594.                 lfsu    f13,4(r5)
  595.                 lfsu    f14,4(r5)
  596.                 mtctr   r3
  597.                 subi    r6,r6,4
  598.                 subi    r4,r4,4
  599. .loop
  600.                 lfsu    f3,4(r6)                ;f3 = ox
  601.                 fmuls   f15,f0,f3
  602.                 fmuls   f16,f1,f3
  603.                 lfsu    f7,4(r6)                ;f7 = oy
  604.                 fmuls   f17,f2,f3
  605.                 fmadds  f15,f4,f7,f15
  606.                 lfsu    f11,4(r6)               ;f11 = oz
  607.                 fmadds  f16,f5,f7,f16
  608.                 fmadds  f17,f6,f7,f17
  609.                 fmadds  f15,f8,f11,f15
  610.                 lfsu    f18,4(r6)               ;f18 = ow
  611.                 fmadds  f16,f9,f11,f16
  612.                 fmadds  f17,f10,f11,f17
  613.                 fmadds  f15,f12,f18,f15
  614.                 stfsu   f15,4(r4)
  615.                 fmadds  f16,f13,f18,f16
  616.                 stfsu   f16,4(r4)
  617.                 fmadds  f17,f14,f18,f17
  618.                 stfsu   f17,4(r4)
  619.                 stfsu   f18,4(r4)
  620.                 bdnz    .loop
  621. .end
  622.                 lfd     f18,-5*8(r1)
  623.                 lfd     f17,-4*8(r1)
  624.                 lfd     f16,-3*8(r1)
  625.                 lfd     f15,-2*8(r1)
  626.                 lfd     f14,-1*8(r1)
  627.                 blr
  628.  
  629.  
  630.  
  631.  
  632. _asm_project_and_cliptest_general
  633.  
  634. ;            const GLfloat *m = ctx->ProjectionMatrix;
  635. ;            GLfloat m0 = m[0],  m4 = m[4],  m8 = m[8],  m12 = m[12];
  636. ;            GLfloat m1 = m[1],  m5 = m[5],  m9 = m[9],  m13 = m[13];
  637. ;            GLfloat m2 = m[2],  m6 = m[6],  m10 = m[10],  m14 = m[14];
  638. ;            GLfloat m3 = m[3],  m7 = m[7],  m11 = m[11],  m15 = m[15];
  639. ;            GLuint i;
  640. ;            for (i=0;i<n;i++) {
  641. ;               GLfloat ex = vEye[i][0], ey = vEye[i][1];
  642. ;               GLfloat ez = vEye[i][2], ew = vEye[i][3];
  643. ;               GLfloat cx = m0 * ex + m4 * ey + m8  * ez + m12 * ew;
  644. ;               GLfloat cy = m1 * ex + m5 * ey + m9  * ez + m13 * ew;
  645. ;               GLfloat cz = m2 * ex + m6 * ey + m10 * ez + m14 * ew;
  646. ;               GLfloat cw = m3 * ex + m7 * ey + m11 * ez + m15 * ew;
  647. ;               GLubyte mask = 0;
  648. ;               vClip[i][0] = cx;
  649. ;               vClip[i][1] = cy;
  650. ;               vClip[i][2] = cz;
  651. ;               vClip[i][3] = cw;
  652. ;               if (cx >  cw)       mask |= CLIP_RIGHT_BIT;
  653. ;               else if (cx < -cw)  mask |= CLIP_LEFT_BIT;
  654. ;               if (cy >  cw)       mask |= CLIP_TOP_BIT;
  655. ;               else if (cy < -cw)  mask |= CLIP_BOTTOM_BIT;
  656. ;               if (cz >  cw)       mask |= CLIP_FAR_BIT;
  657. ;               else if (cz < -cw)  mask |= CLIP_NEAR_BIT;
  658. ;               if (mask) {
  659. ;                  clipMask[i] |= mask;
  660. ;                  tmpOrMask |= mask;
  661. ;               }
  662. ;               tmpAndMask &= mask;
  663. ;            }
  664.  
  665.  
  666.                 IFNE    1
  667.                 stfd    f14,-1*8(r1)
  668.                 stfd    f15,-2*8(r1)
  669.                 stfd    f16,-3*8(r1)
  670.                 stfd    f17,-4*8(r1)
  671.                 stfd    f18,-5*8(r1)
  672.                 stfd    f19,-6*8(r1)
  673.                 stfd    f20,-7*8(r1)
  674.                 stfd    f21,-8*8(r1)
  675.                 stfd    f22,-9*8(r1)
  676.                 stfd    f23,-10*8(r1)
  677.                 mr.     r3,r3
  678.                 beq     .end
  679.                 subi    r5,r5,4
  680.                 lfsu    f0,4(r5)
  681.                 lfsu    f1,4(r5)
  682.                 lfsu    f2,4(r5)
  683.                 lfsu    f3,4(r5)
  684.                 lfsu    f4,4(r5)
  685.                 lfsu    f5,4(r5)
  686.                 lfsu    f6,4(r5)
  687.                 lfsu    f7,4(r5)
  688.                 lfsu    f8,4(r5)
  689.                 lfsu    f9,4(r5)
  690.                 lfsu    f10,4(r5)
  691.                 lfsu    f11,4(r5)
  692.                 lfsu    f12,4(r5)
  693.                 lfsu    f13,4(r5)
  694.                 lfsu    f14,4(r5)
  695.                 lfsu    f15,4(r5)
  696.                 mtctr   r3
  697.                 subi    r6,r6,4
  698.                 subi    r4,r4,4
  699.                 lbz     r11,0(r8)
  700.                 lbz     r12,0(r9)
  701. .loop
  702.                 li      r10,0
  703.                 lfsu    f16,4(r6)               ;f16 = ex
  704.                 fmuls   f19,f0,f16
  705.                 fmuls   f20,f1,f16
  706.                 lfsu    f17,4(r6)               ;f17 = ey
  707.                 fmuls   f21,f2,f16
  708.                 fmuls   f22,f3,f16
  709.                 fmadds  f19,f4,f17,f19
  710.                 lfsu    f18,4(r6)               ;f18 = ez
  711.                 fmadds  f20,f5,f17,f20
  712.                 fmadds  f21,f6,f17,f21
  713.                 fmadds  f22,f7,f17,f22
  714.                 lfsu    f23,4(r6)               ;f23 = ew
  715.                 fmadds  f19,f8,f18,f19
  716.                 fmadds  f20,f9,f18,f20
  717.                 fmadds  f21,f10,f18,f21
  718.                 fmadds  f22,f11,f18,f22
  719.                 fmadds  f19,f12,f23,f19         ;f19 = cx
  720.                 stfsu   f19,4(r4)
  721.                 fmadds  f20,f13,f23,f20         ;f20 = cy
  722.                 stfsu   f20,4(r4)
  723.                 fmadds  f21,f14,f23,f21         ;f21 = cz
  724.                 stfsu   f21,4(r4)
  725.                 fmadds  f22,f15,f23,f22         ;f22 = cw
  726.                 stfsu   f22,4(r4)
  727.  
  728.                 fabs    f16,f19
  729.                 fabs    f17,f20
  730.                 fabs    f18,f21
  731.                 fsubs   f23,f16,f17
  732.                 fsel    f16,f23,f16,f17
  733.                 fsubs   f17,f16,f18
  734.                 fsel    f18,f17,f16,f18
  735.                 fcmpu   f18,f22
  736.                 bgt     .check
  737.                 li      r10,0
  738.                 b       .cont7
  739. .check
  740.  
  741.  
  742.  
  743.                 fneg    f16,f22                 ;f16 = -cw
  744.                 fcmpu   f19,f22
  745.                 ble     .cont1
  746.                 ori     r10,r10,CLIP_RIGHT_BIT
  747.                 b       .cont2
  748. .cont1
  749.                 fcmpu   f19,f16
  750.                 bge     .cont2
  751.                 ori     r10,r10,CLIP_LEFT_BIT
  752. .cont2
  753.                 fcmpu   f20,f22
  754.                 ble     .cont3
  755.                 ori     r10,r10,CLIP_TOP_BIT
  756.                 b       .cont4
  757. .cont3
  758.                 fcmpu   f20,f16
  759.                 bge     .cont4
  760.                 ori     r10,r10,CLIP_BOTTOM_BIT
  761. .cont4
  762.                 fcmpu   f21,f22
  763.                 ble     .cont5
  764.                 ori     r10,r10,CLIP_FAR_BIT
  765.                 b       .cont6
  766. .cont5
  767.                 fcmpu   f21,f16
  768.                 bge     .cont6
  769.                 ori     r10,r10,CLIP_NEAR_BIT
  770. .cont6
  771.                 mr.     r10,r10
  772.                 beq     .cont7
  773.                 lbz     r0,0(r7)
  774.                 or      r0,r0,r10
  775.                 stb     r0,0(r7)
  776.                 or      r11,r11,r10
  777. .cont7
  778.                 and     r12,r12,r10
  779.                 addi    r7,r7,1
  780.                 bdnz    .loop
  781.                 stb     r11,0(r8)
  782.                 stb     r12,0(r9)
  783. .end
  784.                 lfd     f23,-10*8(r1)
  785.                 lfd     f22,-9*8(r1)
  786.                 lfd     f21,-8*8(r1)
  787.                 lfd     f20,-7*8(r1)
  788.                 lfd     f19,-6*8(r1)
  789.                 lfd     f18,-5*8(r1)
  790.                 lfd     f17,-4*8(r1)
  791.                 lfd     f16,-3*8(r1)
  792.                 lfd     f15,-2*8(r1)
  793.                 lfd     f14,-1*8(r1)
  794.                 blr
  795.                 ELSEIF
  796.                 stfd    f14,-1*8(r1)
  797.                 stfd    f15,-2*8(r1)
  798.                 stfd    f16,-3*8(r1)
  799.                 stfd    f17,-4*8(r1)
  800.                 stfd    f18,-5*8(r1)
  801.                 stfd    f19,-6*8(r1)
  802.                 stfd    f20,-7*8(r1)
  803.                 stfd    f21,-8*8(r1)
  804.                 stfd    f22,-9*8(r1)
  805.                 stfd    f23,-10*8(r1)
  806.                 mr.     r3,r3
  807.                 beq     .end
  808.                 subi    r5,r5,4
  809.                 lfsu    f0,4(r5)
  810.                 lfsu    f1,4(r5)
  811.                 lfsu    f2,4(r5)
  812.                 lfsu    f3,4(r5)
  813.                 lfsu    f4,4(r5)
  814.                 lfsu    f5,4(r5)
  815.                 lfsu    f6,4(r5)
  816.                 lfsu    f7,4(r5)
  817.                 lfsu    f8,4(r5)
  818.                 lfsu    f9,4(r5)
  819.                 lfsu    f10,4(r5)
  820.                 lfsu    f11,4(r5)
  821.                 lfsu    f12,4(r5)
  822.                 lfsu    f13,4(r5)
  823.                 lfsu    f14,4(r5)
  824.                 lfsu    f15,4(r5)
  825.                 mtctr   r3
  826.                 subi    r6,r6,4
  827.                 subi    r4,r4,4
  828.                 lbz     r11,0(r8)
  829.                 lbz     r12,0(r9)
  830. .loop
  831.                 li      r10,0
  832.                 lfsu    f16,4(r6)               ;f16 = ex
  833.                 fmuls   f19,f0,f16
  834.                 fmuls   f20,f1,f16
  835.                 lfsu    f17,4(r6)               ;f17 = ey
  836.                 fmuls   f21,f2,f16
  837.                 fmuls   f22,f3,f16
  838.                 fmadds  f19,f4,f17,f19
  839.                 lfsu    f18,4(r6)               ;f18 = ez
  840.                 fmadds  f20,f5,f17,f20
  841.                 fmadds  f21,f6,f17,f21
  842.                 fmadds  f22,f7,f17,f22
  843.                 lfsu    f23,4(r6)               ;f23 = ew
  844.                 fmadds  f19,f8,f18,f19
  845.                 fmadds  f20,f9,f18,f20
  846.                 fmadds  f21,f10,f18,f21
  847.                 fmadds  f22,f11,f18,f22
  848.                 fmadds  f19,f12,f23,f19         ;f19 = cx
  849.                 stfsu   f19,4(r4)
  850.                 fmadds  f20,f13,f23,f20         ;f20 = cy
  851.                 stfsu   f20,4(r4)
  852.                 fmadds  f21,f14,f23,f21         ;f21 = cz
  853.                 stfsu   f21,4(r4)
  854.                 fmadds  f22,f15,f23,f22         ;f22 = cw
  855.                 stfsu   f22,4(r4)
  856.                 fneg    f16,f22                 ;f16 = -cw
  857.  
  858.                 fcmpu   f19,f22
  859.                 ble     .cont1
  860.                 ori     r10,r10,CLIP_RIGHT_BIT
  861.                 b       .cont2
  862. .cont1
  863.                 fcmpu   f19,f16
  864.                 bge     .cont2
  865.                 ori     r10,r10,CLIP_LEFT_BIT
  866. .cont2
  867.                 fcmpu   f20,f22
  868.                 ble     .cont3
  869.                 ori     r10,r10,CLIP_TOP_BIT
  870.                 b       .cont4
  871. .cont3
  872.                 fcmpu   f20,f16
  873.                 bge     .cont4
  874.                 ori     r10,r10,CLIP_BOTTOM_BIT
  875. .cont4
  876.                 fcmpu   f21,f22
  877.                 ble     .cont5
  878.                 ori     r10,r10,CLIP_FAR_BIT
  879.                 b       .cont6
  880. .cont5
  881.                 fcmpu   f21,f16
  882.                 bge     .cont6
  883.                 ori     r10,r10,CLIP_NEAR_BIT
  884. .cont6
  885.                 mr.     r10,r10
  886.                 beq     .cont7
  887.                 lbz     r0,0(r7)
  888.                 or      r0,r0,r10
  889.                 stb     r0,0(r7)
  890.                 or      r11,r11,r10
  891. .cont7
  892.                 and     r12,r12,r10
  893.                 addi    r7,r7,1
  894.                 bdnz    .loop
  895.                 stb     r11,0(r8)
  896.                 stb     r12,0(r9)
  897. .end
  898.                 lfd     f23,-10*8(r1)
  899.                 lfd     f22,-9*8(r1)
  900.                 lfd     f21,-8*8(r1)
  901.                 lfd     f20,-7*8(r1)
  902.                 lfd     f19,-6*8(r1)
  903.                 lfd     f18,-5*8(r1)
  904.                 lfd     f17,-4*8(r1)
  905.                 lfd     f16,-3*8(r1)
  906.                 lfd     f15,-2*8(r1)
  907.                 lfd     f14,-1*8(r1)
  908.                 blr
  909.                 ENDC
  910.  
  911.  
  912.  
  913.  
  914.  
  915. _asm_project_and_cliptest_identity
  916.  
  917. ;            GLuint i;
  918. ;            for (i=0;i<n;i++) {
  919. ;               GLfloat cx = vClip[i][0] = vEye[i][0];
  920. ;               GLfloat cy = vClip[i][1] = vEye[i][1];
  921. ;               GLfloat cz = vClip[i][2] = vEye[i][2];
  922. ;               GLfloat cw = vClip[i][3] = vEye[i][3];
  923. ;               GLubyte mask = 0;
  924. ;               if (cx >  cw)       mask |= CLIP_RIGHT_BIT;
  925. ;               else if (cx < -cw)  mask |= CLIP_LEFT_BIT;
  926. ;               if (cy >  cw)       mask |= CLIP_TOP_BIT;
  927. ;               else if (cy < -cw)  mask |= CLIP_BOTTOM_BIT;
  928. ;               if (cz >  cw)       mask |= CLIP_FAR_BIT;
  929. ;               else if (cz < -cw)  mask |= CLIP_NEAR_BIT;
  930. ;               if (mask) {
  931. ;                  clipMask[i] |= mask;
  932. ;                  tmpOrMask |= mask;
  933. ;               }
  934. ;               tmpAndMask &= mask;
  935. ;            }
  936.  
  937.                 IFNE    1
  938.                 mr.     r3,r3
  939.                 beq     .end
  940.                 mtctr   r3
  941.                 subi    r5,r5,4
  942.                 subi    r4,r4,4
  943.                 lbz     r11,0(r7)
  944.                 lbz     r12,0(r8)
  945. .loop
  946.                 li      r10,0
  947.                 lfsu    f0,4(r5)
  948.                 stfsu   f0,4(r4)
  949.                 lfsu    f1,4(r5)
  950.                 stfsu   f1,4(r4)
  951.                 lfsu    f2,4(r5)
  952.                 stfsu   f2,4(r4)
  953.                 lfsu    f3,4(r5)
  954.                 stfsu   f3,4(r4)
  955.  
  956.                 fabs    f6,f1
  957.                 fabs    f7,f2
  958.                 fabs    f8,f3
  959.                 fsubs   f11,f6,f7
  960.                 fsel    f6,f11,f6,f7
  961.                 fsubs   f7,f6,f8
  962.                 fsel    f8,f7,f6,f8
  963.                 fcmpu   f8,f3
  964.                 bgt     .check
  965.                 li      r10,0
  966.                 b       .cont7
  967. .check
  968.  
  969.                 fneg    f4,f3
  970.                 fcmpu   f0,f3
  971.                 ble     .cont1
  972.                 ori     r10,r10,CLIP_RIGHT_BIT
  973.                 b       .cont2
  974. .cont1
  975.                 fcmpu   f0,f4
  976.                 bge     .cont2
  977.                 ori     r10,r10,CLIP_LEFT_BIT
  978. .cont2
  979.                 fcmpu   f1,f3
  980.                 ble     .cont3
  981.                 ori     r10,r10,CLIP_TOP_BIT
  982.                 b       .cont4
  983. .cont3
  984.                 fcmpu   f1,f4
  985.                 bge     .cont4
  986.                 ori     r10,r10,CLIP_BOTTOM_BIT
  987. .cont4
  988.                 fcmpu   f2,f3
  989.                 ble     .cont5
  990.                 ori     r10,r10,CLIP_FAR_BIT
  991.                 b       .cont6
  992. .cont5
  993.                 fcmpu   f2,f4
  994.                 bge     .cont6
  995.                 ori     r10,r10,CLIP_NEAR_BIT
  996. .cont6
  997.                 mr.     r10,r10
  998.                 beq     .cont7
  999.                 lbz     r0,0(r6)
  1000.                 or      r0,r0,r10
  1001.                 stb     r0,0(r6)
  1002.                 or      r11,r11,r10
  1003. .cont7
  1004.                 and     r12,r12,r10
  1005.                 addi    r6,r6,1
  1006.                 bdnz    .loop
  1007.                 stb     r11,0(r7)
  1008.                 stb     r12,0(r8)
  1009. .end
  1010.                 blr
  1011.                 ELSEIF
  1012.                 mr.     r3,r3
  1013.                 beq     .end
  1014.                 mtctr   r3
  1015.                 subi    r5,r5,4
  1016.                 subi    r4,r4,4
  1017.                 lbz     r11,0(r7)
  1018.                 lbz     r12,0(r8)
  1019. .loop
  1020.                 li      r10,0
  1021.                 lfsu    f0,4(r5)
  1022.                 stfsu   f0,4(r4)
  1023.                 lfsu    f1,4(r5)
  1024.                 stfsu   f1,4(r4)
  1025.                 lfsu    f2,4(r5)
  1026.                 stfsu   f2,4(r4)
  1027.                 lfsu    f3,4(r5)
  1028.                 stfsu   f3,4(r4)
  1029.                 fneg    f4,f3
  1030.                 fcmpu   f0,f3
  1031.                 ble     .cont1
  1032.                 ori     r10,r10,CLIP_RIGHT_BIT
  1033.                 b       .cont2
  1034. .cont1
  1035.                 fcmpu   f0,f4
  1036.                 bge     .cont2
  1037.                 ori     r10,r10,CLIP_LEFT_BIT
  1038. .cont2
  1039.                 fcmpu   f1,f3
  1040.                 ble     .cont3
  1041.                 ori     r10,r10,CLIP_TOP_BIT
  1042.                 b       .cont4
  1043. .cont3
  1044.                 fcmpu   f1,f4
  1045.                 bge     .cont4
  1046.                 ori     r10,r10,CLIP_BOTTOM_BIT
  1047. .cont4
  1048.                 fcmpu   f2,f3
  1049.                 ble     .cont5
  1050.                 ori     r10,r10,CLIP_FAR_BIT
  1051.                 b       .cont6
  1052. .cont5
  1053.                 fcmpu   f2,f4
  1054.                 bge     .cont6
  1055.                 ori     r10,r10,CLIP_NEAR_BIT
  1056. .cont6
  1057.                 mr.     r10,r10
  1058.                 beq     .cont7
  1059.                 lbz     r0,0(r6)
  1060.                 or      r0,r0,r10
  1061.                 stb     r0,0(r6)
  1062.                 or      r11,r11,r10
  1063. .cont7
  1064.                 and     r12,r12,r10
  1065.                 addi    r6,r6,1
  1066.                 bdnz    .loop
  1067.                 stb     r11,0(r7)
  1068.                 stb     r12,0(r8)
  1069. .end
  1070.                 blr
  1071.                 ENDC
  1072.  
  1073.  
  1074.  
  1075.  
  1076.  
  1077. _asm_project_and_cliptest_ortho
  1078.  
  1079. ;            const GLfloat *m = ctx->ProjectionMatrix;
  1080. ;            GLfloat m0 = m[0], m5 = m[5], m10 = m[10], m12 = m[12];
  1081. ;            GLfloat m13 = m[13], m14 = m[14];
  1082. ;            GLuint i;
  1083. ;            for (i=0;i<n;i++) {
  1084. ;               GLfloat ex = vEye[i][0], ey = vEye[i][1];
  1085. ;               GLfloat ez = vEye[i][2], ew = vEye[i][3];
  1086. ;               GLfloat cx = m0 * ex                      + m12 * ew;
  1087. ;               GLfloat cy =           m5 * ey            + m13 * ew;
  1088. ;               GLfloat cz =                     m10 * ez + m14 * ew;
  1089. ;               GLfloat cw =                                      ew;
  1090. ;               GLubyte mask = 0;
  1091. ;               vClip[i][0] = cx;
  1092. ;               vClip[i][1] = cy;
  1093. ;               vClip[i][2] = cz;
  1094. ;               vClip[i][3] = cw;
  1095. ;               if (cx >  cw)       mask |= CLIP_RIGHT_BIT;
  1096. ;               else if (cx < -cw)  mask |= CLIP_LEFT_BIT;
  1097. ;               if (cy >  cw)       mask |= CLIP_TOP_BIT;
  1098. ;               else if (cy < -cw)  mask |= CLIP_BOTTOM_BIT;
  1099. ;               if (cz >  cw)       mask |= CLIP_FAR_BIT;
  1100. ;               else if (cz < -cw)  mask |= CLIP_NEAR_BIT;
  1101. ;               if (mask) {
  1102. ;                  clipMask[i] |= mask;
  1103. ;                  tmpOrMask |= mask;
  1104. ;               }
  1105. ;               tmpAndMask &= mask;
  1106. ;            }
  1107.  
  1108.                 IFNE    1
  1109.                 mr.     r3,r3
  1110.                 beq     .end
  1111.                 lfs     f0,0*4(r5)
  1112.                 lfs     f5,5*4(r5)
  1113.                 lfs     f10,10*4(r5)
  1114.                 lfs     f12,12*4(r5)
  1115.                 lfs     f13,13*4(r5)
  1116.                 lfs     f4,14*4(r5)
  1117.                 mtctr   r3
  1118.                 subi    r6,r6,4
  1119.                 subi    r4,r4,4
  1120.                 lbz     r11,0(r8)
  1121.                 lbz     r12,0(r9)
  1122. .loop
  1123.                 li      r10,0
  1124.                 lfsu    f6,4(r6)                ;f6 = ex
  1125.                 lfsu    f7,4(r6)                ;f7 = ey
  1126.                 fmuls   f1,f0,f6
  1127.                 lfsu    f8,4(r6)                ;f8 = ez
  1128.                 fmuls   f2,f5,f7
  1129.                 lfsu    f9,4(r6)                ;f9 = ew
  1130.                 fmuls   f3,f10,f8
  1131.                 fmadds  f1,f12,f9,f1            ;f1 = cx
  1132.                 stfsu   f1,4(r4)
  1133.                 fmadds  f2,f13,f9,f2            ;f2 = cy
  1134.                 stfsu   f2,4(r4)
  1135.                 fmadds  f3,f4,f9,f3             ;f3 = cz
  1136.                 stfsu   f3,4(r4)
  1137.                 stfsu   f9,4(r4)                ;f9 = cw
  1138.  
  1139.                 fabs    f6,f1
  1140.                 fabs    f7,f2
  1141.                 fabs    f8,f3
  1142.                 fsubs   f11,f6,f7
  1143.                 fsel    f6,f11,f6,f7
  1144.                 fsubs   f7,f6,f8
  1145.                 fsel    f8,f7,f6,f8
  1146.                 fcmpu   f8,f9
  1147.                 bgt     .check
  1148.                 li      r10,0
  1149.                 b       .cont7
  1150. .check
  1151.  
  1152.                 fneg    f11,f9                  ;f11 = -cw
  1153.                 fcmpu   f1,f9
  1154.                 ble     .cont1
  1155.                 ori     r10,r10,CLIP_RIGHT_BIT
  1156.                 b       .cont2
  1157. .cont1
  1158.                 fcmpu   f1,f11
  1159.                 bge     .cont2
  1160.                 ori     r10,r10,CLIP_LEFT_BIT
  1161. .cont2
  1162.                 fcmpu   f2,f9
  1163.                 ble     .cont3
  1164.                 ori     r10,r10,CLIP_TOP_BIT
  1165.                 b       .cont4
  1166. .cont3
  1167.                 fcmpu   f2,f11
  1168.                 bge     .cont4
  1169.                 ori     r10,r10,CLIP_BOTTOM_BIT
  1170. .cont4
  1171.                 fcmpu   f3,f9
  1172.                 ble     .cont5
  1173.                 ori     r10,r10,CLIP_FAR_BIT
  1174.                 b       .cont6
  1175. .cont5
  1176.                 fcmpu   f3,f11
  1177.                 bge     .cont6
  1178.                 ori     r10,r10,CLIP_NEAR_BIT
  1179. .cont6
  1180.                 mr.     r10,r10
  1181.                 beq     .cont7
  1182.                 lbz     r0,0(r7)
  1183.                 or      r0,r0,r10
  1184.                 stb     r0,0(r7)
  1185.                 or      r11,r11,r10
  1186. .cont7
  1187.                 and     r12,r12,r10
  1188.                 addi    r7,r7,1
  1189.                 bdnz    .loop
  1190.                 stb     r11,0(r8)
  1191.                 stb     r12,0(r9)
  1192. .end
  1193.                 blr
  1194.                 ELSEIF
  1195.                 mr.     r3,r3
  1196.                 beq     .end
  1197.                 lfs     f0,0*4(r5)
  1198.                 lfs     f5,5*4(r5)
  1199.                 lfs     f10,10*4(r5)
  1200.                 lfs     f12,12*4(r5)
  1201.                 lfs     f13,13*4(r5)
  1202.                 lfs     f4,14*4(r5)
  1203.                 mtctr   r3
  1204.                 subi    r6,r6,4
  1205.                 subi    r4,r4,4
  1206.                 lbz     r11,0(r8)
  1207.                 lbz     r12,0(r9)
  1208. .loop
  1209.                 li      r10,0
  1210.                 lfsu    f6,4(r6)                ;f6 = ex
  1211.                 lfsu    f7,4(r6)                ;f7 = ey
  1212.                 fmuls   f1,f0,f6
  1213.                 lfsu    f8,4(r6)                ;f8 = ez
  1214.                 fmuls   f2,f5,f7
  1215.                 lfsu    f9,4(r6)                ;f9 = ew
  1216.                 fmuls   f3,f10,f8
  1217.                 fmadds  f1,f12,f9,f1            ;f1 = cx
  1218.                 stfsu   f1,4(r4)
  1219.                 fmadds  f2,f13,f9,f2            ;f2 = cy
  1220.                 stfsu   f2,4(r4)
  1221.                 fmadds  f3,f4,f9,f3             ;f3 = cz
  1222.                 stfsu   f3,4(r4)
  1223.                 stfsu   f9,4(r4)                ;f9 = cw
  1224.                 fneg    f11,f9                  ;f11 = -cw
  1225.  
  1226.                 fcmpu   f1,f9
  1227.                 ble     .cont1
  1228.                 ori     r10,r10,CLIP_RIGHT_BIT
  1229.                 b       .cont2
  1230. .cont1
  1231.                 fcmpu   f1,f11
  1232.                 bge     .cont2
  1233.                 ori     r10,r10,CLIP_LEFT_BIT
  1234. .cont2
  1235.                 fcmpu   f2,f9
  1236.                 ble     .cont3
  1237.                 ori     r10,r10,CLIP_TOP_BIT
  1238.                 b       .cont4
  1239. .cont3
  1240.                 fcmpu   f2,f11
  1241.                 bge     .cont4
  1242.                 ori     r10,r10,CLIP_BOTTOM_BIT
  1243. .cont4
  1244.                 fcmpu   f3,f9
  1245.                 ble     .cont5
  1246.                 ori     r10,r10,CLIP_FAR_BIT
  1247.                 b       .cont6
  1248. .cont5
  1249.                 fcmpu   f3,f11
  1250.                 bge     .cont6
  1251.                 ori     r10,r10,CLIP_NEAR_BIT
  1252. .cont6
  1253.                 mr.     r10,r10
  1254.                 beq     .cont7
  1255.                 lbz     r0,0(r7)
  1256.                 or      r0,r0,r10
  1257.                 stb     r0,0(r7)
  1258.                 or      r11,r11,r10
  1259. .cont7
  1260.                 and     r12,r12,r10
  1261.                 addi    r7,r7,1
  1262.                 bdnz    .loop
  1263.                 stb     r11,0(r8)
  1264.                 stb     r12,0(r9)
  1265. .end
  1266.                 blr
  1267.                 ENDC
  1268.  
  1269. _asm_project_and_cliptest_perspective
  1270.  
  1271. ;            const GLfloat *m = ctx->ProjectionMatrix;
  1272. ;            GLfloat m0 = m[0], m5 = m[5], m8 = m[8], m9 = m[9];
  1273. ;            GLfloat m10 = m[10], m14 = m[14];
  1274. ;            GLuint i;
  1275. ;            for (i=0;i<n;i++) {
  1276. ;               GLfloat ex = vEye[i][0], ey = vEye[i][1];
  1277. ;               GLfloat ez = vEye[i][2], ew = vEye[i][3];
  1278. ;               GLfloat cx = m0 * ex           + m8  * ez           ;
  1279. ;               GLfloat cy =           m5 * ey + m9  * ez           ;
  1280. ;               GLfloat cz =                     m10 * ez + m14 * ew;
  1281. ;               GLfloat cw =                          -ez           ;
  1282. ;               GLubyte mask = 0;
  1283. ;               vClip[i][0] = cx;
  1284. ;               vClip[i][1] = cy;
  1285. ;               vClip[i][2] = cz;
  1286. ;               vClip[i][3] = cw;
  1287. ;               if (cx >  cw)       mask |= CLIP_RIGHT_BIT;
  1288. ;               else if (cx < -cw)  mask |= CLIP_LEFT_BIT;
  1289. ;               if (cy >  cw)       mask |= CLIP_TOP_BIT;
  1290. ;               else if (cy < -cw)  mask |= CLIP_BOTTOM_BIT;
  1291. ;               if (cz >  cw)       mask |= CLIP_FAR_BIT;
  1292. ;               else if (cz < -cw)  mask |= CLIP_NEAR_BIT;
  1293. ;               if (mask) {
  1294. ;                  clipMask[i] |= mask;
  1295. ;                  tmpOrMask |= mask;
  1296. ;               }
  1297. ;               tmpAndMask &= mask;
  1298. ;            }
  1299.                 IFNE    1
  1300.                 mr.     r3,r3
  1301.                 beq     .end
  1302.                 lfs     f0,0*4(r5)
  1303.                 lfs     f5,5*4(r5)
  1304.                 lfs     f8,8*4(r5)
  1305.                 lfs     f9,9*4(r5)
  1306.                 lfs     f10,10*4(r5)
  1307.                 lfs     f4,14*4(r5)
  1308.                 mtctr   r3
  1309.                 subi    r6,r6,4
  1310.                 subi    r4,r4,4
  1311.                 lbz     r11,0(r8)
  1312.                 lbz     r12,0(r9)
  1313. .loop
  1314.                 li      r10,0
  1315.                 lfsu    f6,4(r6)                ;f6 = ex
  1316.                 lfsu    f7,4(r6)                ;f7 = ey
  1317.                 fmuls   f1,f0,f6
  1318.                 lfsu    f12,4(r6)               ;f12 = ez
  1319.                 fmuls   f2,f5,f7
  1320.                 lfsu    f13,4(r6)               ;f13 = ew
  1321.                 fmuls   f3,f10,f12
  1322.                 fmadds  f1,f8,f12,f1            ;f1 = cx
  1323.                 stfsu   f1,4(r4)
  1324.                 fmadds  f2,f9,f12,f2            ;f2 = cy
  1325.                 stfsu   f2,4(r4)
  1326.                 fmadds  f3,f4,f13,f3            ;f3 = cz
  1327.                 stfsu   f3,4(r4)
  1328.                 fneg    f13,f12                 ;f13 = cw
  1329.                 stfsu   f13,4(r4)
  1330.  
  1331.                 fabs    f6,f1
  1332.                 fabs    f7,f2
  1333.                 fabs    f12,f3
  1334.                 fsubs   f11,f6,f7
  1335.                 fsel    f6,f11,f6,f7
  1336.                 fsubs   f7,f6,f12
  1337.                 fsel    f12,f7,f6,f12
  1338.                 fcmpu   f12,f13
  1339.                 bgt     .check
  1340.                 li      r10,0
  1341.                 b       .cont7
  1342. .check
  1343.  
  1344.  
  1345.                 fneg    f11,f13                 ;f11 = -cw
  1346.                 fcmpu   f1,f13
  1347.                 ble     .cont1
  1348.                 ori     r10,r10,CLIP_RIGHT_BIT
  1349.                 b       .cont2
  1350. .cont1
  1351.                 fcmpu   f1,f11
  1352.                 bge     .cont2
  1353.                 ori     r10,r10,CLIP_LEFT_BIT
  1354. .cont2
  1355.                 fcmpu   f2,f13
  1356.                 ble     .cont3
  1357.                 ori     r10,r10,CLIP_TOP_BIT
  1358.                 b       .cont4
  1359. .cont3
  1360.                 fcmpu   f2,f11
  1361.                 bge     .cont4
  1362.                 ori     r10,r10,CLIP_BOTTOM_BIT
  1363. .cont4
  1364.                 fcmpu   f3,f13
  1365.                 ble     .cont5
  1366.                 ori     r10,r10,CLIP_FAR_BIT
  1367.                 b       .cont6
  1368. .cont5
  1369.                 fcmpu   f3,f11
  1370.                 bge     .cont6
  1371.                 ori     r10,r10,CLIP_NEAR_BIT
  1372. .cont6
  1373.                 mr.     r10,r10
  1374.                 beq     .cont7
  1375.                 lbz     r0,0(r7)
  1376.                 or      r0,r0,r10
  1377.                 stb     r0,0(r7)
  1378.                 or      r11,r11,r10
  1379. .cont7
  1380.                 and     r12,r12,r10
  1381.                 addi    r7,r7,1
  1382.                 bdnz    .loop
  1383.                 stb     r11,0(r8)
  1384.                 stb     r12,0(r9)
  1385. .end
  1386.                 blr
  1387.                 ELSEIF
  1388.                 mr.     r3,r3
  1389.                 beq     .end
  1390.                 lfs     f0,0*4(r5)
  1391.                 lfs     f5,5*4(r5)
  1392.                 lfs     f8,8*4(r5)
  1393.                 lfs     f9,9*4(r5)
  1394.                 lfs     f10,10*4(r5)
  1395.                 lfs     f4,14*4(r5)
  1396.                 mtctr   r3
  1397.                 subi    r6,r6,4
  1398.                 subi    r4,r4,4
  1399.                 lbz     r11,0(r8)
  1400.                 lbz     r12,0(r9)
  1401. .loop
  1402.                 li      r10,0
  1403.                 lfsu    f6,4(r6)                ;f6 = ex
  1404.                 lfsu    f7,4(r6)                ;f7 = ey
  1405.                 fmuls   f1,f0,f6
  1406.                 lfsu    f12,4(r6)               ;f12 = ez
  1407.                 fmuls   f2,f5,f7
  1408.                 lfsu    f13,4(r6)               ;f13 = ew
  1409.                 fmuls   f3,f10,f12
  1410.                 fmadds  f1,f8,f12,f1            ;f1 = cx
  1411.                 stfsu   f1,4(r4)
  1412.                 fmadds  f2,f9,f12,f2            ;f2 = cy
  1413.                 stfsu   f2,4(r4)
  1414.                 fmadds  f3,f4,f13,f3            ;f3 = cz
  1415.                 stfsu   f3,4(r4)
  1416.                 fneg    f13,f12                 ;f13 = cw
  1417.                 stfsu   f13,4(r4)
  1418.                 fneg    f11,f13                 ;f11 = -cw
  1419.                 fcmpu   f1,f13
  1420.                 ble     .cont1
  1421.                 ori     r10,r10,CLIP_RIGHT_BIT
  1422.                 b       .cont2
  1423. .cont1
  1424.                 fcmpu   f1,f11
  1425.                 bge     .cont2
  1426.                 ori     r10,r10,CLIP_LEFT_BIT
  1427. .cont2
  1428.                 fcmpu   f2,f13
  1429.                 ble     .cont3
  1430.                 ori     r10,r10,CLIP_TOP_BIT
  1431.                 b       .cont4
  1432. .cont3
  1433.                 fcmpu   f2,f11
  1434.                 bge     .cont4
  1435.                 ori     r10,r10,CLIP_BOTTOM_BIT
  1436. .cont4
  1437.                 fcmpu   f3,f13
  1438.                 ble     .cont5
  1439.                 ori     r10,r10,CLIP_FAR_BIT
  1440.                 b       .cont6
  1441. .cont5
  1442.                 fcmpu   f3,f11
  1443.                 bge     .cont6
  1444.                 ori     r10,r10,CLIP_NEAR_BIT
  1445. .cont6
  1446.                 mr.     r10,r10
  1447.                 beq     .cont7
  1448.                 lbz     r0,0(r7)
  1449.                 or      r0,r0,r10
  1450.                 stb     r0,0(r7)
  1451.                 or      r11,r11,r10
  1452. .cont7
  1453.                 and     r12,r12,r10
  1454.                 addi    r7,r7,1
  1455.                 bdnz    .loop
  1456.                 stb     r11,0(r8)
  1457.                 stb     r12,0(r9)
  1458. .end
  1459.                 blr
  1460.                 ENDC
  1461.  
  1462. _asm_vp_map_vertices_now
  1463.  
  1464. ;      if (clipMask) {
  1465. ;         /* one or more vertices are clipped */
  1466. ;         GLuint i;
  1467. ;         for (i=0;i<n;i++) {
  1468. ;            if (clipMask[i]==0) {
  1469. ;               vWin[i][0] = vClip[i][0] * sx + tx;
  1470. ;               vWin[i][1] = vClip[i][1] * sy + ty;
  1471. ;               vWin[i][2] = vClip[i][2] * sz + tz;
  1472. ;            }
  1473. ;         }
  1474. ;      }
  1475. ;      else {
  1476. ;         /* no vertices are clipped */
  1477. ;         GLuint i;
  1478. ;         for (i=0;i<n;i++) {
  1479. ;            vWin[i][0] = vClip[i][0] * sx + tx;
  1480. ;            vWin[i][1] = vClip[i][1] * sy + ty;
  1481. ;            vWin[i][2] = vClip[i][2] * sz + tz;
  1482. ;         }
  1483. ;      }
  1484.  
  1485.                 mr.     r4,r4
  1486.                 beq     .done
  1487.                 subi    r5,r5,4
  1488.                 mtctr   r4
  1489.                 mr.     r3,r3
  1490.                 beq     .loop2
  1491.                 subi    r3,r3,1
  1492. .loop1
  1493.                 lbzu    r0,1(r3)
  1494.                 mr.     r0,r0
  1495.                 bne     .inc
  1496.                 lfs     f7,0(r6)
  1497.                 fmadds  f10,f1,f7,f4
  1498.                 lfs     f8,4(r6)
  1499.                 fmadds  f11,f2,f8,f5
  1500.                 lfs     f9,8(r6)
  1501.                 fmadds  f12,f3,f9,f6
  1502.                 stfsu   f10,4(r5)
  1503.                 stfsu   f11,4(r5)
  1504.                 stfsu   f12,4(r5)
  1505.                 b       .next
  1506. .inc
  1507.                 addi    r5,r5,12
  1508. .next
  1509.                 addi    r6,r6,16
  1510.                 bdnz    .loop1
  1511.                 blr
  1512. .loop2
  1513.                 lfs     f7,0(r6)
  1514.                 fmadds  f10,f1,f7,f4
  1515.                 lfs     f8,4(r6)
  1516.                 fmadds  f11,f2,f8,f5
  1517.                 lfs     f9,8(r6)
  1518.                 fmadds  f12,f3,f9,f6
  1519.                 stfsu   f10,4(r5)
  1520.                 stfsu   f11,4(r5)
  1521.                 addi    r6,r6,16
  1522.                 stfsu   f12,4(r5)
  1523.                 bdnz    .loop2
  1524. .done
  1525.                 blr
  1526.  
  1527. _asm_vp_map_vertices
  1528.  
  1529. ;      if (clipMask) {
  1530. ;         /* one or more vertices are clipped */
  1531. ;         GLuint i;
  1532. ;         for (i=0;i<n;i++) {
  1533. ;            if (clipMask[i] == 0) {
  1534. ;               if (vClip[i][3] != 0.0F) {
  1535. ;                  GLfloat wInv = 1.0F / vClip[i][3];
  1536. ;                  vWin[i][0] = vClip[i][0] * wInv * sx + tx;
  1537. ;                  vWin[i][1] = vClip[i][1] * wInv * sy + ty;
  1538. ;                  vWin[i][2] = vClip[i][2] * wInv * sz + tz;
  1539. ;               }
  1540. ;               else {
  1541. ;                  /* Div by zero!  Can't set window coords to infinity, so...*/
  1542. ;                  vWin[i][0] = 0.0F;
  1543. ;                  vWin[i][1] = 0.0F;
  1544. ;                  vWin[i][2] = 0.0F;
  1545. ;               }
  1546. ;            }
  1547. ;         }
  1548. ;      }
  1549. ;      else {
  1550. ;         /* no vertices are clipped */
  1551. ;         GLuint i;
  1552. ;         for (i=0;i<n;i++) {
  1553. ;            if (vClip[i][3] != 0.0F) {
  1554. ;               GLfloat wInv = 1.0F / vClip[i][3];
  1555. ;               vWin[i][0] = vClip[i][0] * wInv * sx + tx;
  1556. ;               vWin[i][1] = vClip[i][1] * wInv * sy + ty;
  1557. ;               vWin[i][2] = vClip[i][2] * wInv * sz + tz;
  1558. ;            }
  1559. ;            else {
  1560. ;               /* Divide by zero!  Can't set window coords to infinity, so...*/
  1561. ;               vWin[i][0] = 0.0F;
  1562. ;               vWin[i][1] = 0.0F;
  1563. ;               vWin[i][2] = 0.0F;
  1564. ;            }
  1565. ;         }
  1566. ;      }
  1567.  
  1568.                 stfd    f31,-8(r1)
  1569.                 stfd    f30,-16(r1)
  1570.                 stfd    f29,-24(r1)
  1571.                 stfd    f28,-32(r1)
  1572.                 stfd    f27,-40(r1)
  1573.                 stfd    f26,-48(r1)
  1574.                 mr.     r4,r4
  1575.                 beq     .done
  1576.                 subi    r5,r5,4
  1577.                 mr.     r3,r3
  1578.                 lfs     f12,fp_0(r2)
  1579.                 lfs     f11,fp_2(r2)
  1580.                 beq     .loop2
  1581. .loop1
  1582.                 lbz     r0,0(r3)
  1583.                 mr.     r0,r0
  1584.                 bne     .inc
  1585.                 lfs     f13,12(r6)
  1586.                 fcmpu   f13,f12
  1587.                 beq     .zero
  1588.                 fres    f13,f13
  1589.                 lfs     f7,0(r6)
  1590.                 fmuls   f7,f7,f13
  1591.                 lfs     f8,4(r6)
  1592.                 fmadds  f7,f1,f7,f4
  1593.                 fmuls   f8,f8,f13
  1594.                 lfs     f9,8(r6)
  1595.                 fmadds  f8,f2,f8,f5
  1596.                 stfsu   f7,4(r5)
  1597.                 fmuls   f9,f9,f13
  1598.                 stfsu   f8,4(r5)
  1599.                 fmadds  f9,f3,f9,f6
  1600.                 stfsu   f9,4(r5)
  1601.                 b       .next
  1602. .zero
  1603.                 li      r0,0
  1604.                 stwu    r0,4(r5)
  1605.                 stwu    r0,4(r5)
  1606.                 stwu    r0,4(r5)
  1607.                 b       .next
  1608. .inc
  1609.                 addi    r5,r5,12
  1610. .next
  1611.                 addi    r6,r6,16
  1612.                 addi    r3,r3,1
  1613.                 subic.  r4,r4,1
  1614.                 bne     .loop1
  1615.                 b       .done
  1616. .loop2
  1617.  
  1618.                 cmplwi  r4,4
  1619.                 blt     .normal
  1620.  
  1621. /* quad parallel turbo division */
  1622.                 lfs     f8,12(r6)
  1623.                 lfs     f9,12+16(r6)
  1624.                 lfs     f10,12+2*16(r6)
  1625.                 lfs     f13,12+3*16(r6)
  1626.  
  1627.                 fmuls   f0,f8,f8
  1628.                 fmuls   f31,f9,f9
  1629.                 fmuls   f29,f10,f10
  1630.                 fmuls   f27,f13,f13
  1631.                 fcmpu   f0,f12
  1632.                 beq     .skip1
  1633.                 frsqrte f0,f0
  1634. .skip1
  1635.                 fcmpu   f31,f12
  1636.                 beq     .skip2
  1637.                 frsqrte f31,f31
  1638. .skip2
  1639.                 fcmpu   f29,f12
  1640.                 beq     .skip3
  1641.                 frsqrte f29,f29
  1642. .skip3
  1643.                 fcmpu   f27,f12
  1644.                 beq     .skip4
  1645.                 frsqrte f27,f27
  1646. .skip4
  1647.                 fnmsubs f7,f0,f8,f11
  1648.                 fnmsubs f30,f31,f9,f11
  1649.                 fnmsubs f28,f29,f10,f11
  1650.                 fnmsubs f26,f27,f13,f11
  1651.                 fmuls   f0,f0,f7
  1652.                 fmuls   f31,f31,f30
  1653.                 fmuls   f29,f29,f28
  1654.                 fmuls   f27,f27,f26
  1655.                 fnmsubs f7,f0,f8,f11
  1656.                 fnmsubs f30,f31,f9,f11
  1657.                 fnmsubs f28,f29,f10,f11
  1658.                 fnmsubs f26,f27,f13,f11
  1659.                 fmuls   f0,f0,f7
  1660.                 fmuls   f31,f31,f30
  1661.                 fmuls   f29,f29,f28
  1662.                 fmuls   f27,f27,f26
  1663.                 fnmsubs f7,f0,f8,f11
  1664.                 fnmsubs f30,f31,f9,f11
  1665.                 fnmsubs f28,f29,f10,f11
  1666.                 fnmsubs f26,f27,f13,f11
  1667.                 fmuls   f0,f0,f7
  1668.                 fmuls   f31,f31,f30
  1669.                 fmuls   f29,f29,f28
  1670.                 fmuls   f27,f27,f26
  1671.  
  1672.                 lfs     f7,0(r6)
  1673.                 fmuls   f7,f7,f0
  1674.                 lfs     f8,4(r6)
  1675.                 fmadds  f7,f1,f7,f4
  1676.                 fmuls   f8,f8,f0
  1677.                 lfs     f9,8(r6)
  1678.                 fmadds  f8,f2,f8,f5
  1679.                 stfsu   f7,4(r5)
  1680.                 fmuls   f9,f9,f0
  1681.                 stfsu   f8,4(r5)
  1682.                 fmadds  f9,f3,f9,f6
  1683.                 addi    r6,r6,16
  1684.                 stfsu   f9,4(r5)
  1685.  
  1686.                 lfs     f7,0(r6)
  1687.                 fmuls   f7,f7,f31
  1688.                 lfs     f8,4(r6)
  1689.                 fmadds  f7,f1,f7,f4
  1690.                 fmuls   f8,f8,f31
  1691.                 lfs     f9,8(r6)
  1692.                 fmadds  f8,f2,f8,f5
  1693.                 stfsu   f7,4(r5)
  1694.                 fmuls   f9,f9,f31
  1695.                 stfsu   f8,4(r5)
  1696.                 fmadds  f9,f3,f9,f6
  1697.                 addi    r6,r6,16
  1698.                 stfsu   f9,4(r5)
  1699.  
  1700.                 lfs     f7,0(r6)
  1701.                 fmuls   f7,f7,f29
  1702.                 lfs     f8,4(r6)
  1703.                 fmadds  f7,f1,f7,f4
  1704.                 fmuls   f8,f8,f29
  1705.                 lfs     f9,8(r6)
  1706.                 fmadds  f8,f2,f8,f5
  1707.                 stfsu   f7,4(r5)
  1708.                 fmuls   f9,f9,f29
  1709.                 stfsu   f8,4(r5)
  1710.                 fmadds  f9,f3,f9,f6
  1711.                 addi    r6,r6,16
  1712.                 stfsu   f9,4(r5)
  1713.  
  1714.                 lfs     f7,0(r6)
  1715.                 fmuls   f7,f7,f27
  1716.                 lfs     f8,4(r6)
  1717.                 fmadds  f7,f1,f7,f4
  1718.                 fmuls   f8,f8,f27
  1719.                 lfs     f9,8(r6)
  1720.                 fmadds  f8,f2,f8,f5
  1721.                 stfsu   f7,4(r5)
  1722.                 fmuls   f9,f9,f27
  1723.                 stfsu   f8,4(r5)
  1724.                 fmadds  f9,f3,f9,f6
  1725.                 addi    r6,r6,16
  1726.                 stfsu   f9,4(r5)
  1727.  
  1728.                 subic.  r4,r4,4
  1729.                 bne     .loop2
  1730.                 b       .done
  1731.  
  1732. .normal
  1733.  
  1734.  
  1735.                 lfs     f13,12(r6)
  1736.                 fcmpu   f13,f12
  1737.                 beq     .zero2
  1738.                 fres    f13,f13
  1739.                 lfs     f7,0(r6)
  1740.                 fmuls   f7,f7,f13
  1741.                 lfs     f8,4(r6)
  1742.                 fmadds  f7,f1,f7,f4
  1743.                 fmuls   f8,f8,f13
  1744.                 lfs     f9,8(r6)
  1745.                 fmadds  f8,f2,f8,f5
  1746.                 stfsu   f7,4(r5)
  1747.                 fmuls   f9,f9,f13
  1748.                 stfsu   f8,4(r5)
  1749.                 fmadds  f9,f3,f9,f6
  1750.                 addi    r6,r6,16
  1751.                 stfsu   f9,4(r5)
  1752.                 b       .next2
  1753. .zero2
  1754.                 li      r0,0
  1755.                 stwu    r0,4(r5)
  1756.                 stwu    r0,4(r5)
  1757.                 stwu    r0,4(r5)
  1758.                 addi    r6,r6,16
  1759. .next2
  1760.                 subic.  r4,r4,1
  1761.                 bne     .loop2
  1762. .done
  1763.                 lfd     f26,-48(r1)
  1764.                 lfd     f27,-40(r1)
  1765.                 lfd     f28,-32(r1)
  1766.                 lfd     f29,-24(r1)
  1767.                 lfd     f30,-16(r1)
  1768.                 lfd     f31,-8(r1)
  1769.                 blr
  1770.  
  1771.                 section data
  1772. fp_0            dc.s    0
  1773. fp_2            dc.s    2
  1774.